On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis thaliana et de la micro Tomate.
load(paste0("./GenesCO2_", specie, ".RData"))
# quantification file
data <- read.csv("quantifFiles/QuantifGenes.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
keep <- rowSums(data) >= 10
data <- data[keep, ]
group <- sapply(colnames(data), getLabel, with.rep = F)
colnames(data) <- sapply(colnames(data), getLabel)
specie = "At"
clusteredGenes <- clustering(sharedBy3, data)****************************************
coseq analysis: Poisson approach & none transformation
K = 2 to 12
Use set.seed() prior to running coseq for reproducible results.
****************************************
Running g = 2 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 3 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 4 ...
[1] "Initialization: 1"
[1] "Log-like diff: 9.85664883046411e-11"
Running g = 5 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 6 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 7 ...
[1] "Initialization: 1"
[1] "Log-like diff: 5.98119243022666e-08"
Running g = 8 ...
[1] "Initialization: 1"
[1] "Log-like diff: 5.9811952723976e-08"
Running g = 9 ...
[1] "Initialization: 1"
[1] "Log-like diff: 5.96855898038484e-13"
Running g = 10 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
Running g = 11 ...
[1] "Initialization: 1"
[1] "Log-like diff: 8.95155949365289e-09"
Running g = 12 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0"
$ICL
$profiles
$boxplots
$probapost_barplots
*************************************************
Model: Poisson
Transformation: none
*************************************************
Clusters fit: 2,3,4,5,6,7,8,9,10,11,12
Clusters with errors: ---
Selected number of clusters via ICL: 12
ICL of selected model: -740101.6
*************************************************
Number of clusters = 12
ICL = -740101.6
*************************************************
Cluster sizes:
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
22 12 13 4 9 12 9
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
6 15 9 2 18
Number of observations with MAP > 0.90 (% of total):
131 (100%)
Number of observations with MAP > 0.90 per cluster (% of total per cluster):
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
22 12 13 4 9 12 9
(100%) (100%) (100%) (100%) (100%) (100%) (100%)
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
6 15 9 2 18
(100%) (100%) (100%) (100%) (100%)
a$cluster <- clusteredGenes[a$ensembl_gene_id]
entrezID <- list()
nb_clust = max(clusteredGenes)
for (clust in seq(1:nb_clust)) {
# print(entrez[entrez$cluster == clust,]$ensembl_transcript_id)
entrezID[[length(entrezID) + 1]] <- na.omit(a[a$cluster == clust, ]$entrezgene_id)
}
names(entrezID) <- as.character(seq(1:nb_clust))
ck <- compareCluster(geneCluster = entrezID, fun = "enrichGO", OrgDb = org.At.tair.db,
ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.01, qvalueCutoff = 0.05)
clusterProfiler::dotplot(ck, x = ~Cluster)# On essaie un autre clustering avec lka librarie MPLN mpln (dataset =
# as.matrix(data[sharedBy3,])) beaucoup trop long, même mutlithreadé, c'est n'impModel-Based Clustering Using MPLN (Parallelized) Description Performs clustering using mixtures of multivariate Poisson-log normal (MPLN) distribution and model selection using AIC, AIC3, BIC and ICL. Since each component/cluster size (G) is independent from another, all Gs in the range to be tested have been parallelized to run on a seperate core using the parallel R package.
Class: pca dudi
Call: dudi.pca(df = log(data + 0.1), center = TRUE, scale = TRUE, scannf = FALSE,
nf = 4)
Total inertia: 24
Eigenvalues:
Ax1 Ax2 Ax3 Ax4 Ax5
17.2837 4.7082 0.6320 0.5028 0.2617
Projected inertia (%):
Ax1 Ax2 Ax3 Ax4 Ax5
72.015 19.617 2.633 2.095 1.090
Cumulative projected inertia (%):
Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
72.02 91.63 94.27 96.36 97.45
(Only 5 dimensions (out of 24) are shown)
NULL
load("./normalized.count_At.RData")
log.data <- log2(normalized.count[sharedBy3, ] + 1)
Norm.interest.corr <- corr.test(t(log.data), method = "pearson", ci = F)
Norm.interest.corr$p[lower.tri(Norm.interest.corr$p, diag = -TRUE)] = NA
Pval.adj <- as.data.frame(as.table(Norm.interest.corr$p))
Norm.interest.corr$r[lower.tri(Norm.interest.corr$r, diag = TRUE)] = NA
Correlation <- as.data.frame(as.table(Norm.interest.corr$r))
Cor.table <- na.exclude(cbind(Correlation, Pval.adj))[, c(1, 2, 3, 6)]
colnames(Cor.table) <- c("gene1", "gene2", "cor", "p.adj")
Cor.table.filt <- Cor.table[(abs(Cor.table[, 3]) > 0.9 & Cor.table[, 4] < 0.01),
]
g <- graph.data.frame(Cor.table.filt[, 1:2], directed = -FALSE)
V(g)$color <- clusteredGenes[V(g)]
degree <- degree(g)
hist(degree, breaks = 30)Node_nw_st <- data.frame(degree, betweenness)
plot.igraph(g, vertex.size = 5, vertex.label.cex = 0.3, color = clusteredGenes)library(d3r)
data_json <- d3_igraph(g)
write(data_json, "data.json")
write.table(Cor.table.filt, "GraphCO2.txt", sep = "\t", row.names = F, quote = F)
Rank_stat <- rowMeans(cbind(rank(Node_nw_st[, 1]), rank(Node_nw_st[, 2])))
Node_nw_st <- cbind(Node_nw_st, Rank_stat)
write.table(Node_nw_st, file = "StatsCO2.txt", sep = "\t", col.names = NA, quote = F)****************************************
coseq analysis: Poisson approach & none transformation
K = 2 to 12
Use set.seed() prior to running coseq for reproducible results.
****************************************
Running g = 2 ...
[1] "Initialization: 1"
[1] "Log-like diff: 4.59017712728382e-10"
Running g = 3 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.0857992577874072"
[1] "Log-like diff: 0.0273270790755795"
[1] "Log-like diff: 0.00026883443707959"
[1] "Log-like diff: 3.19062152343008e-06"
Running g = 4 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1.21945280540103e-07"
Running g = 5 ...
[1] "Initialization: 1"
[1] "Log-like diff: 2.64516266668124e-08"
Running g = 6 ...
[1] "Initialization: 1"
[1] "Log-like diff: 4.55134596677453e-10"
Running g = 7 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.000462489060510052"
[1] "Log-like diff: 0.000242301219245178"
[1] "Log-like diff: 4.77362100070877e-05"
[1] "Log-like diff: 1.09183634933174e-05"
[1] "Log-like diff: 2.0185365059433e-06"
Running g = 8 ...
[1] "Initialization: 1"
[1] "Log-like diff: 6.74642105237781"
[1] "Log-like diff: 0.748917649480589"
[1] "Log-like diff: 0.443303248886963"
[1] "Log-like diff: 0.190917891083934"
[1] "Log-like diff: 0.073022458493746"
Running g = 9 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.000229290797541637"
[1] "Log-like diff: 2.07037126571663e-05"
[1] "Log-like diff: 2.86282421768647e-07"
Running g = 10 ...
[1] "Initialization: 1"
[1] "Log-like diff: 4.80115819989635e-05"
[1] "Log-like diff: 6.73545858731472e-06"
Running g = 11 ...
[1] "Initialization: 1"
[1] "Log-like diff: 3.83538203863054e-07"
Running g = 12 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.005250564818855"
[1] "Log-like diff: 0.000985161822388392"
[1] "Log-like diff: 0.000184812593015948"
[1] "Log-like diff: 3.46787723870534e-05"
[1] "Log-like diff: 6.49753503623174e-06"
$ICL
$profiles
$boxplots
$probapost_barplots
*************************************************
Model: Poisson
Transformation: none
*************************************************
Clusters fit: 2,3,4,5,6,7,8,9,10,11,12
Clusters with errors: ---
Selected number of clusters via ICL: 12
ICL of selected model: -3013888
*************************************************
Number of clusters = 12
ICL = -3013888
*************************************************
Cluster sizes:
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
32 76 176 55 99 47 7
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
23 26 60 64 172
Number of observations with MAP > 0.90 (% of total):
833 (99.52%)
Number of observations with MAP > 0.90 per cluster (% of total per cluster):
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
32 75 175 55 99 46 7
(100%) (98.68%) (99.43%) (100%) (100%) (97.87%) (100%)
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
23 25 60 64 172
(100%) (96.15%) (100%) (100%) (100%)
Class: pca dudi
Call: dudi.pca(df = log(data + 0.1), center = TRUE, scale = TRUE, scannf = FALSE,
nf = 4)
Total inertia: 24
Eigenvalues:
Ax1 Ax2 Ax3 Ax4 Ax5
19.1693 3.3531 0.5281 0.3930 0.1205
Projected inertia (%):
Ax1 Ax2 Ax3 Ax4 Ax5
79.872 13.971 2.200 1.638 0.502
Cumulative projected inertia (%):
Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
79.87 93.84 96.04 97.68 98.18
(Only 5 dimensions (out of 24) are shown)
NULL
log.data <- log2(normalized.count[sharedBy3, ] + 1)
Norm.interest.corr <- corr.test(t(log.data), method = "pearson", ci = F)
Norm.interest.corr$p[lower.tri(Norm.interest.corr$p, diag = -TRUE)] = NA
Pval.adj <- as.data.frame(as.table(Norm.interest.corr$p))
Norm.interest.corr$r[lower.tri(Norm.interest.corr$r, diag = TRUE)] = NA
Correlation <- as.data.frame(as.table(Norm.interest.corr$r))
Cor.table <- na.exclude(cbind(Correlation, Pval.adj))[, c(1, 2, 3, 6)]
colnames(Cor.table) <- c("gene1", "gene2", "cor", "p.adj")
Cor.table.filt <- Cor.table[(abs(Cor.table[, 3]) > 0.9 & Cor.table[, 4] < 0.01),
]
g <- graph.data.frame(Cor.table.filt[, 1:2], directed = -FALSE)
V(g)$color <- clusteredGenes[V(g)]
degree <- degree(g)
hist(degree, breaks = 30)Node_nw_st <- data.frame(degree, betweenness)
plot.igraph(g, vertex.size = 5, vertex.label.cex = 0.01, color = clusteredGenes)a$cluster <- clusteredGenes[a$ensembl_gene_id]
entrezID <- list()
nb_clust = max(clusteredGenes)
for (clust in seq(1:nb_clust)) {
# print(entrez[entrez$cluster == clust,]$ensembl_transcript_id)
entrezID[[length(entrezID) + 1]] <- na.omit(a[a$cluster == clust, ]$entrezgene_id)
}
names(entrezID) <- as.character(seq(1:nb_clust))
ck <- compareCluster(geneCluster = entrezID, fun = "enrichGO", OrgDb = org.At.tair.db,
ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.01, qvalueCutoff = 0.05)
clusterProfiler::dotplot(ck, x = ~Cluster)****************************************
coseq analysis: Poisson approach & none transformation
K = 2 to 12
Use set.seed() prior to running coseq for reproducible results.
****************************************
Running g = 2 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1.06819442180495e-10"
Running g = 3 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.0758628365212921"
[1] "Log-like diff: 0.0198623147528725"
[1] "Log-like diff: 0.00525704057190701"
[1] "Log-like diff: 0.00139919104513453"
[1] "Log-like diff: 0.000341532015648127"
Running g = 4 ...
[1] "Initialization: 1"
[1] "Log-like diff: 0.0111170800072813"
[1] "Log-like diff: 0.00141139826298442"
[1] "Log-like diff: 0.000161397428641408"
[1] "Log-like diff: 2.2397745002678e-05"
[1] "Log-like diff: 2.78974421874523e-06"
Running g = 5 ...
[1] "Initialization: 1"
[1] "Log-like diff: 986.277045880157"
[1] "Log-like diff: 1238.12354241205"
[1] "Log-like diff: 834.458311448919"
[1] "Log-like diff: 535.055524516963"
[1] "Log-like diff: 353.900755196122"
Running g = 6 ...
[1] "Initialization: 1"
[1] "Log-like diff: 61.6607030025399"
[1] "Log-like diff: 220.279999620298"
[1] "Log-like diff: 343.597475596329"
[1] "Log-like diff: 118.657798253033"
[1] "Log-like diff: 89.3377189676476"
Running g = 7 ...
[1] "Initialization: 1"
[1] "Log-like diff: 1071.58883560517"
[1] "Log-like diff: 800.521834137188"
[1] "Log-like diff: 366.481058739596"
[1] "Log-like diff: 474.616622316664"
[1] "Log-like diff: 2708.0722365157"
Running g = 8 ...
[1] "Initialization: 1"
[1] "Log-like diff: 880.312874833506"
[1] "Log-like diff: 275.320585913289"
[1] "Log-like diff: 326.646768080891"
[1] "Log-like diff: 72.2925259231472"
[1] "Log-like diff: 302.279208953793"
Running g = 9 ...
[1] "Initialization: 1"
[1] "Log-like diff: 47.7096958256379"
[1] "Log-like diff: 66.4585845237957"
[1] "Log-like diff: 22.8816925488037"
[1] "Log-like diff: 314.666696217909"
[1] "Log-like diff: 181.581898960294"
Running g = 10 ...
[1] "Initialization: 1"
[1] "Log-like diff: 18.7472611407226"
[1] "Log-like diff: 4.96990848783927"
[1] "Log-like diff: 0.763933732350862"
[1] "Log-like diff: 0.120476697969336"
[1] "Log-like diff: 0.0199799576709712"
Running g = 11 ...
[1] "Initialization: 1"
[1] "Log-like diff: 479.657137925635"
[1] "Log-like diff: 520.602794556823"
[1] "Log-like diff: 257.720945661473"
[1] "Log-like diff: 160.971666718626"
[1] "Log-like diff: 12.0087493319837"
Running g = 12 ...
[1] "Initialization: 1"
[1] "Log-like diff: 621.010440786969"
[1] "Log-like diff: 740.364460866503"
[1] "Log-like diff: 532.464606735472"
[1] "Log-like diff: 50.7983333830509"
[1] "Log-like diff: 88.5679701787833"
$ICL
$profiles
$boxplots
$probapost_barplots
*************************************************
Model: Poisson
Transformation: none
*************************************************
Clusters fit: 2,3,4,5,6,7,8,9,10,11,12
Clusters with errors: ---
Selected number of clusters via ICL: 12
ICL of selected model: -3391000
*************************************************
Number of clusters = 12
ICL = -3391000
*************************************************
Cluster sizes:
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
301 71 121 712 122 24 172
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
256 104 42 307 609
Number of observations with MAP > 0.90 (% of total):
2759 (97.11%)
Number of observations with MAP > 0.90 per cluster (% of total per cluster):
Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5 Cluster 6 Cluster 7
288 70 116 705 116 22 167
(95.68%) (98.59%) (95.87%) (99.02%) (95.08%) (91.67%) (97.09%)
Cluster 8 Cluster 9 Cluster 10 Cluster 11 Cluster 12
247 98 40 291 599
(96.48%) (94.23%) (95.24%) (94.79%) (98.36%)
Class: pca dudi
Call: dudi.pca(df = log(data + 0.1), center = TRUE, scale = TRUE, scannf = FALSE,
nf = 4)
Total inertia: 24
Eigenvalues:
Ax1 Ax2 Ax3 Ax4 Ax5
22.03089 1.11520 0.27107 0.11321 0.06945
Projected inertia (%):
Ax1 Ax2 Ax3 Ax4 Ax5
91.7954 4.6467 1.1295 0.4717 0.2894
Cumulative projected inertia (%):
Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
91.80 96.44 97.57 98.04 98.33
(Only 5 dimensions (out of 24) are shown)
NULL
log.data <- log2(normalized.count[sharedBy3, ] + 1)
Norm.interest.corr <- corr.test(t(log.data), method = "pearson", ci = F)
Norm.interest.corr$p[lower.tri(Norm.interest.corr$p, diag = -TRUE)] = NA
Pval.adj <- as.data.frame(as.table(Norm.interest.corr$p))
Norm.interest.corr$r[lower.tri(Norm.interest.corr$r, diag = TRUE)] = NA
Correlation <- as.data.frame(as.table(Norm.interest.corr$r))
Cor.table <- na.exclude(cbind(Correlation, Pval.adj))[, c(1, 2, 3, 6)]
colnames(Cor.table) <- c("gene1", "gene2", "cor", "p.adj")
Cor.table.filt <- Cor.table[(abs(Cor.table[, 3]) > 0.9 & Cor.table[, 4] < 0.01),
]
g <- graph.data.frame(Cor.table.filt[, 1:2], directed = -FALSE)
V(g)$color <- clusteredGenes[V(g)]
degree <- degree(g)
hist(degree, breaks = 30)Node_nw_st <- data.frame(degree, betweenness)
plot.igraph(g, vertex.size = 5, vertex.label.cex = 0.01, color = clusteredGenes)a$cluster <- clusteredGenes[a$ensembl_gene_id]
entrezID <- list()
nb_clust = max(clusteredGenes)
for (clust in seq(1:nb_clust)) {
# print(entrez[entrez$cluster == clust,]$ensembl_transcript_id)
entrezID[[length(entrezID) + 1]] <- na.omit(a[a$cluster == clust, ]$entrezgene_id)
}
names(entrezID) <- as.character(seq(1:nb_clust))
ck <- compareCluster(geneCluster = entrezID, fun = "enrichGO", OrgDb = org.At.tair.db,
ont = "BP", pAdjustMethod = "BH", pvalueCutoff = 0.01, qvalueCutoff = 0.05)
clusterProfiler::dotplot(ck, x = ~Cluster)